/* For detailed instructions, see README file */


OPTIONS SOURCE MACROGEN SYMBOLGEN MPRINT COMPRESS=YES REUSE=YES SORTSIZE= MAX;




/*=========================================*/
/*Select path where files are saved */
libname OP "INSERT PATH";
/*Select path to save logs*/
%let LOG_PATH=INSERT PATH\Table6_A4.log;
options dlcreatedir; 
%PUT &LOG_PATH;
/*=========================================*/






proc printto log="&LOG_PATH" new;
run;




ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
FOOTNOTE " ";
ODS TITLE " ";
/*=========================================*/

DATA REG_DATA;
	SET OP.OPRA_Pseudo;
	AUC_PFOF = AUCTION_IND*PFOF_IND;
	CHARDATE = put(date,date9.);



RUN;







PROC SQL;
	CREATE TABLE REG_DATA_D1
	AS SELECT

	CHARDATE, SYMBOL_ONLY,

	Underlying_Vol -        MEAN(Underlying_Vol) as Underlying_Vol,
	PFOF_IND - 				MEAN(PFOF_IND) AS PFOF_IND,
	AUCTION_IND - 			MEAN(AUCTION_IND) AS AUCTION_IND,
	AUC_PFOF - 				MEAN(AUC_PFOF) AS AUC_PFOF,
	DMMP_IND - 				MEAN(DMMP_IND) AS DMMP_IND,
	ARBITRAGE - 			MEAN(ARBITRAGE) AS ARBITRAGE,
	Abs_Delta - 			MEAN(Abs_Delta) AS Abs_Delta,
	INV_STOCK_MIDPOINT - 	MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
	GAMMA - 				MEAN(GAMMA) AS GAMMA,
	VEGA  - 				MEAN(VEGA) AS VEGA,
	INV_OPTION_MIDPOINT - 	MEAN(INV_OPTION_MIDPOINT) as INV_OPTION_MIDPOINT,
	CALL_IND - 				MEAN(CALL_IND) AS CALL_IND,
	LOG_DAYS_EXP - 			MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
	LOG_S_VOL - 			MEAN(LOG_S_VOL) AS LOG_S_VOL,
	TICK_CHANGE_IND - 		MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
	LOG_MCAP - 				MEAN(LOG_MCAP) AS LOG_MCAP,
	MMRev_C - 				MEAN(MMRev_C) AS MMRev_C,
	PIMP_C - 				MEAN(PIMP_C) AS PIMP_C,
	EQ - 					MEAN(EQ) AS EQ,
	QUOTEDSPREAD_C  -	 	MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C ,
	EFFECTIVESPREAD_C  -	MEAN(EFFECTIVESPREAD_C) AS EFFECTIVESPREAD_C ,
	STOCK_QS - 				MEAN(STOCK_QS) AS STOCK_QS,
	BUY_IND  - 				MEAN(BUY_IND) AS BUY_IND,
	QS_OPTIONSCALED     -   MEAN(QS_OPTIONSCALED) AS QS_OPTIONSCALED,
	ES_OPTIONSCALED   -     MEAN(ES_OPTIONSCALED) AS ES_OPTIONSCALED,
	PIMP_P            -     MEAN(PIMP_P) AS PIMP_P ,
	MMRev_P           -     MEAN(MMRev_P) AS MMRev_P


	FROM REG_DATA
	GROUP BY SYMBOL_ONLY;
QUIT;



PROC SQL;
	CREATE TABLE REG_DATA_D2
	AS SELECT

	CHARDATE, SYMBOL_ONLY,

	Underlying_Vol -        MEAN(Underlying_Vol) as Underlying_Vol,
	PFOF_IND - 				MEAN(PFOF_IND) AS PFOF_IND,
	AUCTION_IND - 			MEAN(AUCTION_IND) AS AUCTION_IND,
	AUC_PFOF - 				MEAN(AUC_PFOF) AS AUC_PFOF,
	DMMP_IND - 				MEAN(DMMP_IND) AS DMMP_IND,
	ARBITRAGE - 			MEAN(ARBITRAGE) AS ARBITRAGE,
	Abs_Delta - 			MEAN(Abs_Delta) AS Abs_Delta,
	INV_STOCK_MIDPOINT - 	MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
	GAMMA - 				MEAN(GAMMA) AS GAMMA,
	VEGA  - 				MEAN(VEGA) AS VEGA,
	INV_OPTION_MIDPOINT - 	MEAN(INV_OPTION_MIDPOINT) as INV_OPTION_MIDPOINT,
	CALL_IND - 				MEAN(CALL_IND) AS CALL_IND,
	LOG_DAYS_EXP - 			MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
	LOG_S_VOL - 			MEAN(LOG_S_VOL) AS LOG_S_VOL,
	TICK_CHANGE_IND - 		MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
	LOG_MCAP - 				MEAN(LOG_MCAP) AS LOG_MCAP,
	MMRev_C - 				MEAN(MMRev_C) AS MMRev_C,
	PIMP_C - 				MEAN(PIMP_C) AS PIMP_C,
	EQ - 					MEAN(EQ) AS EQ,
	QUOTEDSPREAD_C  -	 	MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C ,
	EFFECTIVESPREAD_C  -	MEAN(EFFECTIVESPREAD_C) AS EFFECTIVESPREAD_C ,
	STOCK_QS - 				MEAN(STOCK_QS) AS STOCK_QS,
	BUY_IND  - 				MEAN(BUY_IND) AS BUY_IND,
	QS_OPTIONSCALED     -   MEAN(QS_OPTIONSCALED) AS QS_OPTIONSCALED,
	ES_OPTIONSCALED   -     MEAN(ES_OPTIONSCALED) AS ES_OPTIONSCALED,
	PIMP_P            -     MEAN(PIMP_P) AS PIMP_P ,
	MMRev_P           -     MEAN(MMRev_P) AS MMRev_P


	FROM REG_DATA_D1
	GROUP BY CHARDATE;
QUIT;





ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
/*=========================================*/

PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
		MMREV_C=

		PFOF_IND
		AUCTION_IND
		AUC_PFOF
		DMMP_IND
		Abs_Delta
		INV_STOCK_MIDPOINT
		GAMMA
		VEGA
		BUY_IND
		TICK_CHANGE_IND
		INV_OPTION_MIDPOINT
		CALL_IND
		LOG_DAYS_EXP
		LOG_S_VOL
		TICK_CHANGE_IND
		LOG_MCAP




/NOINT
;
quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	MMREV_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP




	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
	rename Estimate=value;

		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;
		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_MR;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 
run;


proc sql;
	create table Regression_MR
	as select a.*, b.Probt
	from Regression_MR A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	QUOTEDSPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP



/NOINT
;


quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	QUOTEDSPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_QS;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 

run;


proc sql;
	create table Regression_QS
	as select a.*, b.Probt
	from Regression_QS A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;






PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
EFFECTIVESPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;

quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	EFFECTIVESPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_ES;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_ES
	as select a.*, b.Probt
	from Regression_ES A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;













PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	PIMP_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";

		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	PIMP_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;
	
run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_PIMP_C;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_PIMP_C
	as select a.*, b.Probt
	from Regression_PIMP_C A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;



data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_EQ;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 			then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_EQ
	as select a.*, b.Probt
	from Regression_EQ A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 


	A.VALUE AS VALUE_QS,	A.PROBT AS PROBT_QS, 	A.N as N_QS, 	A.PARAMETER AS PARAMETER_QS,	A.TYPE AS TYPE_QS,
	B.VALUE AS VALUE_ES,	B.PROBT AS PROBT_ES, 	B.N as N_ES, 	B.PARAMETER AS PARAMETER_ES,	B.TYPE AS TYPE_ES,
	C.VALUE AS VALUE_PIMP,	C.PROBT AS PROBT_PIMP, 	C.N as N_PIMP, 	C.PARAMETER AS PARAMETER_PIMP,	C.TYPE AS TYPE_PIMP,
	D.VALUE AS VALUE_EQ,	D.PROBT AS PROBT_EQ, 	D.N as N_EQ, 	D.PARAMETER AS PARAMETER_EQ,	D.TYPE AS TYPE_EQ,
	E.VALUE AS VALUE_MMREV,	E.PROBT AS PROBT_MMREV, E.N as N_MMREV, E.PARAMETER AS PARAMETER_MMREV,	E.TYPE AS TYPE_MMREV
	
	
	FROM REGRESSION_QS A 
	LEFT JOIN REGRESSION_ES B
	ON A.PARAMETER=B.PARAMETER & A.TYPE=B.TYPE
	LEFT JOIN REGRESSION_PIMP_C C
	ON A.PARAMETER=C.PARAMETER & A.TYPE=C.TYPE
	LEFT JOIN REGRESSION_EQ D
	ON A.PARAMETER=D.PARAMETER & A.TYPE=D.TYPE
	LEFT JOIN REGRESSION_MR E
	ON A.PARAMETER=E.PARAMETER & A.TYPE=E.TYPE
;
QUIT;



data Regression_Combined;
SET Regression_Combined;

if PARAMETER_QS="AUCTION_IND" THEN N_QS=0.9;
if PARAMETER_QS="AUC_PFOF" THEN N_QS=2.9;

IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="E" then VARIABLE = PUT("PFOF",$48.);
IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUCTION_IND" & TYPE_QS="E" then VARIABLE = PUT("Auction",$48.);
if PARAMETER_QS="AUCTION_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="E" then VARIABLE = PUT("Option Quoted Spread",$48.);
if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



if PARAMETER_QS="AUC_PFOF" & TYPE_QS="E" then VARIABLE = PUT("Auction $\times$ PFOF",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="DMMP_IND" & TYPE_QS="E" then VARIABLE = PUT("DMMP",$48.);
if PARAMETER_QS="DMMP_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="VEGA" & TYPE_QS="E" then VARIABLE = PUT("Vega",$48.);
if PARAMETER_QS="VEGA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS="ABS_DELTA"  & TYPE_QS="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);
IF PARAMETER_QS ="ABS_DELTA" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="GAMMA" & TYPE_QS="E" then VARIABLE = PUT("Gamma",$48.);
if PARAMETER_QS="GAMMA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "TICK_CHANGE_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER_QS = "PTICK_CHANGE_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Underlying Midpoint",$48.);
IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "CALL_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER_QS = "CALL_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="E" THEN VARIABLE = PUT("Underlying Quoted Spr.",$48.);
IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "BUY_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Buy",$48.);
IF PARAMETER_QS = "BUY_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="E" THEN VARIABLE = PUT("Arbitrage$_{t-1}$",$48.);
IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="E" THEN  VARIABLE = PUT("Days-to-Expiry",$48.);
IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="E"  then  VARIABLE = PUT("Underlying Volatility$_{t-1}$",$48.);
IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="E" THEN  VARIABLE = PUT("Underlying MCAP",$48.);
IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS = "Rsq" & TYPE_QS="E" THEN  VARIABLE = PUT("Rsq",$48.);

IF PARAMETER_QS = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);

char_2dash= put("\\",$12.);


	
	if VARIABLE not in ("Rsq","NObs") then do;
	
			if    PROBT_QS<0.01 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\threeS"),$48.);
			else if PROBT_QS<0.05 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\twoS"),$48.);
			else if PROBT_QS<0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\oneS"),$48.);
			else if PROBT_QS>0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$CHAR48.);
	end;

	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_ES) then do;
	
			if    PROBT_ES<0.01 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\threeS"),$28.);
			else if PROBT_ES<0.05 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\twoS"),$28.);
			else if PROBT_ES<0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\oneS"),$28.);
			else if PROBT_ES>0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$CHAR48.);
	end;
	
	
	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_PIMP) then do;
	
			if    PROBT_PIMP<0.01 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\threeS"),$48.);
			else if PROBT_PIMP<0.05 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\twoS"),$48.);
			else if PROBT_PIMP<0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\oneS"),$48.);
			else if PROBT_PIMP>0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$CHAR48.);
	end;

	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_EQ) then do;
	
			if    PROBT_EQ<0.01 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\threeS"),$48.);
			else if PROBT_EQ<0.05 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\twoS"),$48.);
			else if PROBT_EQ<0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\oneS"),$48.);
			else if PROBT_EQ>0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$CHAR48.);
	end;

		if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_MMREV) then do;
	
			if    PROBT_MMREV<0.01 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\threeS"),$48.);
			else if PROBT_MMREV<0.05 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\twoS"),$48.);
			else if PROBT_MMREV<0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\oneS"),$48.);
			else if PROBT_MMREV>0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$CHAR48.);
	end;

	
	


		if Type_QS="S" then do;
				VARIABLE = PUT(" ",$48.);
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_QS) THEN
				VALUE_CHAR_QS=PUT(CATS("(",PUT(Value_QS,8.3),")"),$28.);
				IF NOT MISSING(Value_ES) THEN
				VALUE_CHAR_ES=PUT(CATS("(",PUT(Value_ES,8.3),")"),$28.);
				IF NOT MISSING(Value_PIMP) THEN
				VALUE_CHAR_PIMP=PUT(CATS("(",PUT(Value_PIMP,8.3),")"),$28.);
				IF NOT MISSING(Value_EQ) THEN
		    	VALUE_CHAR_EQ=PUT(CATS("(",PUT(Value_EQ,8.3),")"),$28.);
				IF NOT MISSING(Value_MMREV) THEN
		    	VALUE_CHAR_MMREV=PUT(CATS("(",PUT(Value_MMREV,8.3),")"),$28.);

			end;
			
	
	 


if VARIABLE in ("Rsq","NObs") then do;
	

	VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$28.);
	VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$28.);
	VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$28.);
	VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$28.);
	VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$28.);

END;

RUN;


proc sort data=Regression_Combined;by n_QS TYPE_QS;run;



ODS RESULTS;
ODS SELECT ALL;
ODS GRAPHICS OFF;
ODS TITLE "TABLE 6:Execution Quality, Auctions and PFOF DMMs (Pseudo Data) - PANEL A";
FOOTNOTE "This table presents regression results for execution quality using pseudo OPRA data.";
PROC REPORT data=Regression_Combined;
columns
    variable 
    value_char_QS
    value_char_ES
    value_char_PIMP
    value_char_EQ
    value_char_MMREV;

DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
DEFINE value_char_QS / DISPLAY "Quoted Spread" format =$168.;
DEFINE value_char_ES / DISPLAY "Effective Spread" format =$168.;
DEFINE value_char_PIMP / DISPLAY "Price Improvement" format =$168.;
DEFINE value_char_EQ / DISPLAY "EQ" format =$168.;
DEFINE value_char_MMREV / DISPLAY "Market Maker Revenues" format =$168.;

run;
ODS SELECT NONE;
ODS NORESULTS;
ODS GRAPHICS OFF;
ODS TITLE " ";
FOOTNOTE " ";
/*=========================================*/






/*Continuous DATA FOR PANEL B*/



DATA INPUT_DATA;
	SET REG_DATA;
/*	Continuous*/
	if AUCTION_IND=0;

	IF MISSING(DMM_IDENTITY)=0 & DMM_IDENTITY IN 
	("Citadel",
	"Susquehanna"
	"Wolverine",
	"Dash"
	) 
	THEN PFOF_DMM_2=PUT("YES",$3.); ELSE PFOF_DMM_2=PUT("NO",$3.);
RUN;



DATA INPUT_DATA;
	SET INPUT_DATA;

	TIME_POST= intnx('second5', time, 1, 'BEGINNING');
	TIME_PRE= intnx('second5', time, 1, 'BEGINNING');


	 KEEP SYMBOL_ONLY DATE TIME_POST CALL_IND UNIQUE_ID PFOF_DMM_2 TIME_PRE TIME;



RUN;



DATA AUCTIONS;
	SET REG_DATA;
	if AUCTION_IND=1;

	TIME_PRE =intnx('second5', time, 1, 'BEGINNING');
	TIME_POST=intnx('second5', time, 0, 'BEGINNING');



	KEEP SYMBOL_ONLY DATE TIME_PRE TIME_POST CALL_IND TIME;


RUN;
PROC SQL;
    CREATE TABLE INPUT_DATA AS 
    SELECT DISTINCT A.*,ABS(B.TIME - A.TIME) AS TIME_DIFF
    FROM INPUT_DATA A
    INNER JOIN AUCTIONS B
        ON A.SYMBOL_ONLY = B.SYMBOL_ONLY 
       AND A.CALL_IND = B.CALL_IND 
       AND A.DATE = B.DATE
       AND (B.TIME_POST = A.TIME_POST OR B.TIME_PRE = A.TIME_POST OR B.TIME_POST = A.TIME_PRE OR B.TIME_PRE = A.TIME_PRE);
QUIT;

PROC SORT DATA=INPUT_DATA; BY UNIQUE_ID TIME_DIFF;RUN;

DATA INPUT_DATA;
	SET INPUT_DATA;
	 BY UNIQUE_ID TIME_DIFF;
	 IF FIRST.UNIQUE_ID;
RUN;

DATA INPUT_DATA;
	SET INPUT_DATA;
	IF TIME_DIFF >5 THEN DELETE;
RUN;

PROC SQL;
	CREATE TABLE INPUT_DATA
	AS SELECT A.*
	FROM REG_DATA A INNER JOIN INPUT_DATA B
	ON
	A.UNIQUE_ID = B.UNIQUE_ID;
QUIT;


/*KEEP MATCHED DATA*/
DATA INPUT_DATA;
	SET INPUT_DATA;
	IF MISSING(ABS_DELTA)=0 & MISSING(OPTION_MIDPOINT)=0 &  MISSING(S_VOL)=0;
	IF MISSING(DMM_IDENTITY)=0 & DMM_IDENTITY IN 
	("Citadel",
	"Susquehanna"
	"Wolverine",
	"Dash"
	) 
	THEN PFOF_DMM_2=PUT("YES",$3.); ELSE PFOF_DMM_2=PUT("NO",$3.);

RUN;

PROC SQL;
    CREATE TABLE INPUT_DATA AS
    SELECT *,
           SUM(CASE WHEN PFOF_DMM_2="YES" THEN 1 ELSE 0 END) AS PFOF_SUM,
           SUM(CASE WHEN PFOF_DMM_2="NO"  THEN 1 ELSE 0 END) AS  NPFOF_SUM
	

    FROM INPUT_DATA
    GROUP BY SYMBOL_ONLY, DATE, CALL_IND
    HAVING NPFOF_SUM>3 & PFOF_SUM>=2;
QUIT;




DATA INPUT_DATA;
	SET INPUT_DATA;

		STOCK_MIDPOINT = 1/INV_STOCK_MIDPOINT;
		OPTION_MIDPOINT = 1/INV_OPTION_MIDPOINT;

RUN;
	


PROC SORT TAGSORT DATA=INPUT_DATA; BY SYMBOL_ONLY DATE CALL_IND;RUN;
proc psmatch data= INPUT_DATA region=TREATED;
	class PFOF_DMM_2 ;

	psmodel PFOF_DMM_2(Treated='YES')= OPTION_MIDPOINT S_VOL ABS_DELTA;
	match method=GREEDY(k=1)  stat=lps caliper=0.25;
	assess lps var=(OPTION_MIDPOINT S_VOL ABS_DELTA) / weight=none ;
	output out(obs=match)= MATCHES_LOB_ALL lps=_Lps matchid=_MatchID;
	BY SYMBOL_ONLY DATE CALL_IND;

run;










DATA INPUT_DATA_2;
	SET REG_DATA;

		if AUCTION_IND=1;

RUN;


DATA INPUT_DATA_2;
	SET INPUT_DATA_2;
	
		IF MISSING(DMM_IDENTITY)=0 & DMM_IDENTITY IN 
		("Citadel",
		"Susquehanna"
		"Wolverine",
		"Dash"
		) 
		THEN PFOF_DMM_2=PUT("YES",$3.); ELSE PFOF_DMM_2=PUT("NO",$3.);
RUN;



DATA INPUT_DATA_2;
	SET INPUT_DATA_2;

	STOCK_MIDPOINT = 1/INV_STOCK_MIDPOINT;
	OPTION_MIDPOINT = 1/INV_OPTION_MIDPOINT;

RUN;
	

PROC SORT TAGSORT DATA= INPUT_DATA_2; BY SYMBOL_ONLY DATE CALL_IND;RUN;
PROC SQL;
    CREATE TABLE INPUT_DATA_2 AS
    SELECT *
    FROM INPUT_DATA_2
    GROUP BY SYMBOL_ONLY, DATE, CALL_IND
    HAVING SUM(PFOF_DMM_2 = 'NO') >= 2 AND SUM(PFOF_DMM_2 = 'YES') >= 2;
QUIT;

proc psmatch data= INPUT_DATA_2 region=TREATED;
class PFOF_DMM_2 ;

	psmodel PFOF_DMM_2(Treated='YES')= OPTION_MIDPOINT S_VOL ABS_DELTA;
	match method=GREEDY(k=1)  stat=lps caliper=0.25;
	assess lps var=(OPTION_MIDPOINT S_VOL ABS_DELTA ) / weight=none ;
	output out(obs=match)=MATCHES_AUC lps=_Lps matchid=_MatchID;
	BY SYMBOL_ONLY DATE CALL_IND;

run;

















DATA REG_DATA_MATCHED;
	SET MATCHES_LOB_ALL matches_auc;
RUN;


PROC SQL;
	CREATE TABLE REG_DATA_MATCHED_D1
	AS SELECT

	CHARDATE, SYMBOL_ONLY,

	Underlying_Vol -        MEAN(Underlying_Vol) as Underlying_Vol,
	PFOF_IND - 				MEAN(PFOF_IND) AS PFOF_IND,
	AUCTION_IND - 			MEAN(AUCTION_IND) AS AUCTION_IND,
	AUC_PFOF - 				MEAN(AUC_PFOF) AS AUC_PFOF,
	DMMP_IND - 				MEAN(DMMP_IND) AS DMMP_IND,
	ARBITRAGE - 			MEAN(ARBITRAGE) AS ARBITRAGE,
	Abs_Delta - 			MEAN(Abs_Delta) AS Abs_Delta,
	INV_STOCK_MIDPOINT - 	MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
	GAMMA - 				MEAN(GAMMA) AS GAMMA,
	VEGA  - 				MEAN(VEGA) AS VEGA,
	INV_OPTION_MIDPOINT - 	MEAN(INV_OPTION_MIDPOINT) as INV_OPTION_MIDPOINT,
	CALL_IND - 				MEAN(CALL_IND) AS CALL_IND,
	LOG_DAYS_EXP - 			MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
	LOG_S_VOL - 			MEAN(LOG_S_VOL) AS LOG_S_VOL,
	TICK_CHANGE_IND - 		MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
	LOG_MCAP - 				MEAN(LOG_MCAP) AS LOG_MCAP,
	MMRev_C - 				MEAN(MMRev_C) AS MMRev_C,
	PIMP_C - 				MEAN(PIMP_C) AS PIMP_C,
	EQ - 					MEAN(EQ) AS EQ,
	QUOTEDSPREAD_C  -	 	MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C ,
	EFFECTIVESPREAD_C  -	MEAN(EFFECTIVESPREAD_C) AS EFFECTIVESPREAD_C ,
	STOCK_QS - 				MEAN(STOCK_QS) AS STOCK_QS,
	BUY_IND  - 				MEAN(BUY_IND) AS BUY_IND,
	QS_OPTIONSCALED     -   MEAN(QS_OPTIONSCALED) AS QS_OPTIONSCALED,
	ES_OPTIONSCALED   -     MEAN(ES_OPTIONSCALED) AS ES_OPTIONSCALED,
	PIMP_P            -     MEAN(PIMP_P) AS PIMP_P ,
	MMRev_P           -     MEAN(MMRev_P) AS MMRev_P

	FROM REG_DATA_MATCHED
	GROUP BY SYMBOL_ONLY;
QUIT;



PROC SQL;
	CREATE TABLE REG_DATA_MATCHED_D2
	AS SELECT

	CHARDATE, SYMBOL_ONLY,

	Underlying_Vol -        MEAN(Underlying_Vol) as Underlying_Vol,
	PFOF_IND - 				MEAN(PFOF_IND) AS PFOF_IND,
	AUCTION_IND - 			MEAN(AUCTION_IND) AS AUCTION_IND,
	AUC_PFOF - 				MEAN(AUC_PFOF) AS AUC_PFOF,
	DMMP_IND - 				MEAN(DMMP_IND) AS DMMP_IND,
	ARBITRAGE - 			MEAN(ARBITRAGE) AS ARBITRAGE,
	Abs_Delta - 			MEAN(Abs_Delta) AS Abs_Delta,
	INV_STOCK_MIDPOINT - 	MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
	GAMMA - 				MEAN(GAMMA) AS GAMMA,
	VEGA  - 				MEAN(VEGA) AS VEGA,
	INV_OPTION_MIDPOINT - 	MEAN(INV_OPTION_MIDPOINT) as INV_OPTION_MIDPOINT,
	CALL_IND - 				MEAN(CALL_IND) AS CALL_IND,
	LOG_DAYS_EXP - 			MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
	LOG_S_VOL - 			MEAN(LOG_S_VOL) AS LOG_S_VOL,
	TICK_CHANGE_IND - 		MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
	LOG_MCAP - 				MEAN(LOG_MCAP) AS LOG_MCAP,
	MMRev_C - 				MEAN(MMRev_C) AS MMRev_C,
	PIMP_C - 				MEAN(PIMP_C) AS PIMP_C,
	EQ - 					MEAN(EQ) AS EQ,
	QUOTEDSPREAD_C  -	 	MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C ,
	EFFECTIVESPREAD_C  -	MEAN(EFFECTIVESPREAD_C) AS EFFECTIVESPREAD_C ,
	STOCK_QS - 				MEAN(STOCK_QS) AS STOCK_QS,
	BUY_IND  - 				MEAN(BUY_IND) AS BUY_IND,
	QS_OPTIONSCALED     -   MEAN(QS_OPTIONSCALED) AS QS_OPTIONSCALED,
	ES_OPTIONSCALED   -     MEAN(ES_OPTIONSCALED) AS ES_OPTIONSCALED,
	PIMP_P            -     MEAN(PIMP_P) AS PIMP_P ,
	MMRev_P           -     MEAN(MMRev_P) AS MMRev_P

	FROM REG_DATA_MATCHED_D1
	GROUP BY CHARDATE;
QUIT;







PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
		MMREV_C=

		PFOF_IND
		AUCTION_IND
		AUC_PFOF
		DMMP_IND
		Abs_Delta
		INV_STOCK_MIDPOINT
		GAMMA
		VEGA
		BUY_IND
		TICK_CHANGE_IND
		INV_OPTION_MIDPOINT
		CALL_IND
		LOG_DAYS_EXP
		LOG_S_VOL
		TICK_CHANGE_IND
		LOG_MCAP




/NOINT
;
quit;







data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
		set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	MMREV_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP




	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_MR;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 
run;


proc sql;
	create table Regression_MR
	as select a.*, b.Probt
	from Regression_MR A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	QUOTEDSPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP



/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	QUOTEDSPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_QS;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 

run;


proc sql;
	create table Regression_QS
	as select a.*, b.Probt
	from Regression_QS A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;






PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
EFFECTIVESPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;

quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	EFFECTIVESPREAD_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_ES;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_ES
	as select a.*, b.Probt
	from Regression_ES A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;













PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	PIMP_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";

		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	PIMP_C=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
	
run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_PIMP_C;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_PIMP_C
	as select a.*, b.Probt
	from Regression_PIMP_C A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";

		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;



data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_EQ;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 			then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_EQ
	as select a.*, b.Probt
	from Regression_EQ A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 


	A.VALUE AS VALUE_QS,	A.PROBT AS PROBT_QS, 	A.N as N_QS, 	A.PARAMETER AS PARAMETER_QS,	A.TYPE AS TYPE_QS,
	B.VALUE AS VALUE_ES,	B.PROBT AS PROBT_ES, 	B.N as N_ES, 	B.PARAMETER AS PARAMETER_ES,	B.TYPE AS TYPE_ES,
	C.VALUE AS VALUE_PIMP,	C.PROBT AS PROBT_PIMP, 	C.N as N_PIMP, 	C.PARAMETER AS PARAMETER_PIMP,	C.TYPE AS TYPE_PIMP,
	D.VALUE AS VALUE_EQ,	D.PROBT AS PROBT_EQ, 	D.N as N_EQ, 	D.PARAMETER AS PARAMETER_EQ,	D.TYPE AS TYPE_EQ,
	E.VALUE AS VALUE_MMREV,	E.PROBT AS PROBT_MMREV, E.N as N_MMREV, E.PARAMETER AS PARAMETER_MMREV,	E.TYPE AS TYPE_MMREV
	
	
	FROM REGRESSION_QS A 
	LEFT JOIN REGRESSION_ES B
	ON A.PARAMETER=B.PARAMETER & A.TYPE=B.TYPE
	LEFT JOIN REGRESSION_PIMP_C C
	ON A.PARAMETER=C.PARAMETER & A.TYPE=C.TYPE
	LEFT JOIN REGRESSION_EQ D
	ON A.PARAMETER=D.PARAMETER & A.TYPE=D.TYPE
	LEFT JOIN REGRESSION_MR E
	ON A.PARAMETER=E.PARAMETER & A.TYPE=E.TYPE
;
QUIT;



data Regression_Combined;
SET Regression_Combined;

if PARAMETER_QS="AUCTION_IND" THEN N_QS=0.9;
if PARAMETER_QS="AUC_PFOF" THEN N_QS=2.9;

IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="E" then VARIABLE = PUT("PFOF",$48.);
IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUCTION_IND" & TYPE_QS="E" then VARIABLE = PUT("Auction",$48.);
if PARAMETER_QS="AUCTION_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="E" then VARIABLE = PUT("Option Quoted Spread",$48.);
if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



if PARAMETER_QS="AUC_PFOF" & TYPE_QS="E" then VARIABLE = PUT("Auction $\times$ PFOF",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="DMMP_IND" & TYPE_QS="E" then VARIABLE = PUT("DMMP",$48.);
if PARAMETER_QS="DMMP_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="VEGA" & TYPE_QS="E" then VARIABLE = PUT("Vega",$48.);
if PARAMETER_QS="VEGA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS="ABS_DELTA"  & TYPE_QS="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);
IF PARAMETER_QS ="ABS_DELTA" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="GAMMA" & TYPE_QS="E" then VARIABLE = PUT("Gamma",$48.);
if PARAMETER_QS="GAMMA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "TICK_CHANGE_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER_QS = "PTICK_CHANGE_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Underlying Midpoint",$48.);
IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "CALL_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER_QS = "CALL_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="E" THEN VARIABLE = PUT("Underlying Quoted Spr.",$48.);
IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "BUY_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Buy",$48.);
IF PARAMETER_QS = "BUY_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="E" THEN VARIABLE = PUT("Arbitrage$_{t-1}$",$48.);
IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="E" THEN  VARIABLE = PUT("Days-to-Expiry",$48.);
IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="E"  then  VARIABLE = PUT("Underlying Volatility$_{t-1}$",$48.);
IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="E" THEN  VARIABLE = PUT("Underlying MCAP",$48.);
IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS = "Rsq" & TYPE_QS="E" THEN  VARIABLE = PUT("Rsq",$48.);

IF PARAMETER_QS = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);

char_2dash= put("\\",$12.);


	
	if VARIABLE not in ("Rsq","NObs") then do;
	
			if    PROBT_QS<0.01 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\threeS"),$48.);
			else if PROBT_QS<0.05 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\twoS"),$48.);
			else if PROBT_QS<0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\oneS"),$48.);
			else if PROBT_QS>0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$CHAR48.);
	end;

	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_ES) then do;
	
			if    PROBT_ES<0.01 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\threeS"),$28.);
			else if PROBT_ES<0.05 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\twoS"),$28.);
			else if PROBT_ES<0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\oneS"),$28.);
			else if PROBT_ES>0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$CHAR48.);
	end;
	
	
	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_PIMP) then do;
	
			if    PROBT_PIMP<0.01 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\threeS"),$48.);
			else if PROBT_PIMP<0.05 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\twoS"),$48.);
			else if PROBT_PIMP<0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\oneS"),$48.);
			else if PROBT_PIMP>0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$CHAR48.);
	end;

	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_EQ) then do;
	
			if    PROBT_EQ<0.01 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\threeS"),$48.);
			else if PROBT_EQ<0.05 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\twoS"),$48.);
			else if PROBT_EQ<0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\oneS"),$48.);
			else if PROBT_EQ>0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$CHAR48.);
	end;

		if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_MMREV) then do;
	
			if    PROBT_MMREV<0.01 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\threeS"),$48.);
			else if PROBT_MMREV<0.05 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\twoS"),$48.);
			else if PROBT_MMREV<0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\oneS"),$48.);
			else if PROBT_MMREV>0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$CHAR48.);
	end;

	
	


		if Type_QS="S" then do;
				VARIABLE = PUT(" ",$48.);
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_QS) THEN
				VALUE_CHAR_QS=PUT(CATS("(",PUT(Value_QS,8.3),")"),$28.);
				IF NOT MISSING(Value_ES) THEN
				VALUE_CHAR_ES=PUT(CATS("(",PUT(Value_ES,8.3),")"),$28.);
				IF NOT MISSING(Value_PIMP) THEN
				VALUE_CHAR_PIMP=PUT(CATS("(",PUT(Value_PIMP,8.3),")"),$28.);
				IF NOT MISSING(Value_EQ) THEN
		    	VALUE_CHAR_EQ=PUT(CATS("(",PUT(Value_EQ,8.3),")"),$28.);
				IF NOT MISSING(Value_MMREV) THEN
		    	VALUE_CHAR_MMREV=PUT(CATS("(",PUT(Value_MMREV,8.3),")"),$28.);

			end;
			
	
	 


if VARIABLE in ("Rsq","NObs") then do;
	

	VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$28.);
	VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$28.);
	VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$28.);
	VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$28.);
	VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$28.);

END;

RUN;


proc sort data=Regression_Combined;by n_QS TYPE_QS;run;



ODS SELECT ALL;
ODS RESULTS;
ODS GRAPHICS OFF;
/*=========================================*/
TITLE "TABLE 6:Execution Quality, Auctions and PFOF DMMs (Pseudo Data) - PANEL B";
FOOTNOTE "This table presents regression results for execution quality using pseudo OPRA data.";
PROC REPORT data=Regression_Combined;
columns
		variable 
		value_char_QS
		value_char_ES
		value_char_PIMP
		value_char_EQ
		value_char_MMREV;
		

DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
DEFINE value_char_QS / DISPLAY "Quoted Spread" format =$168.;
DEFINE value_char_ES / DISPLAY "Effective Spread" format =$168.;
DEFINE value_char_PIMP / DISPLAY "Price Improvement" format =$168.;
DEFINE value_char_EQ / DISPLAY "EQ" format =$168.;
DEFINE value_char_MMREV / DISPLAY "Market Maker Revenues" format =$168.;

run;

ODS SELECT NONE;
ODS NORESULTS;
ODS GRAPHICS OFF;
ODS TITLE " ";
FOOTNOTE " ";
/*=========================================*/

















/* RELATIVE TO MIDPOINT APPENDIX A4 TABLE*/

ODS GRAPHICS OFF;
ODS NORESULTS;

PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
		MMREV_P=

		PFOF_IND
		AUCTION_IND
		AUC_PFOF
		DMMP_IND
		Abs_Delta
		INV_STOCK_MIDPOINT
		GAMMA
		VEGA
		BUY_IND
		TICK_CHANGE_IND
		CALL_IND
		LOG_DAYS_EXP
		LOG_S_VOL
		TICK_CHANGE_IND
		LOG_MCAP




/NOINT
;
quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	MMREV_P=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP




	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

	rename Estimate=value;
	Type="E";
	IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;
	 parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_MR;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 
run;


proc sql;
	create table Regression_MR
	as select a.*, b.Probt
	from Regression_MR A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	QS_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP



/NOINT
;


quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	QS_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

	rename Estimate=value;
	Type="E";
	IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;
	 parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_QS;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 

run;


proc sql;
	create table Regression_QS
	as select a.*, b.Probt
	from Regression_QS A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;






PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
ES_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;

quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	ES_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_ES;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_ES
	as select a.*, b.Probt
	from Regression_ES A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;













PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	PIMP_P=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";

		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	PIMP_P=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;
	
run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_PIMP_C;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_PIMP_C
	as select a.*, b.Probt
	from Regression_PIMP_C A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;



data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_EQ;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 			then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_EQ
	as select a.*, b.Probt
	from Regression_EQ A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 


	A.VALUE AS VALUE_QS,	A.PROBT AS PROBT_QS, 	A.N as N_QS, 	A.PARAMETER AS PARAMETER_QS,	A.TYPE AS TYPE_QS,
	B.VALUE AS VALUE_ES,	B.PROBT AS PROBT_ES, 	B.N as N_ES, 	B.PARAMETER AS PARAMETER_ES,	B.TYPE AS TYPE_ES,
	C.VALUE AS VALUE_PIMP,	C.PROBT AS PROBT_PIMP, 	C.N as N_PIMP, 	C.PARAMETER AS PARAMETER_PIMP,	C.TYPE AS TYPE_PIMP,
	D.VALUE AS VALUE_EQ,	D.PROBT AS PROBT_EQ, 	D.N as N_EQ, 	D.PARAMETER AS PARAMETER_EQ,	D.TYPE AS TYPE_EQ,
	E.VALUE AS VALUE_MMREV,	E.PROBT AS PROBT_MMREV, E.N as N_MMREV, E.PARAMETER AS PARAMETER_MMREV,	E.TYPE AS TYPE_MMREV
	
	
	FROM REGRESSION_QS A 
	LEFT JOIN REGRESSION_ES B
	ON A.PARAMETER=B.PARAMETER & A.TYPE=B.TYPE
	LEFT JOIN REGRESSION_PIMP_C C
	ON A.PARAMETER=C.PARAMETER & A.TYPE=C.TYPE
	LEFT JOIN REGRESSION_EQ D
	ON A.PARAMETER=D.PARAMETER & A.TYPE=D.TYPE
	LEFT JOIN REGRESSION_MR E
	ON A.PARAMETER=E.PARAMETER & A.TYPE=E.TYPE
;
QUIT;



data Regression_Combined;
SET Regression_Combined;

if PARAMETER_QS="AUCTION_IND" THEN N_QS=0.9;
if PARAMETER_QS="AUC_PFOF" THEN N_QS=2.9;

IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="E" then VARIABLE = PUT("PFOF",$48.);
IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUCTION_IND" & TYPE_QS="E" then VARIABLE = PUT("Auction",$48.);
if PARAMETER_QS="AUCTION_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="E" then VARIABLE = PUT("Option Quoted Spread",$48.);
if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



if PARAMETER_QS="AUC_PFOF" & TYPE_QS="E" then VARIABLE = PUT("Auction $\times$ PFOF",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="DMMP_IND" & TYPE_QS="E" then VARIABLE = PUT("DMMP",$48.);
if PARAMETER_QS="DMMP_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="VEGA" & TYPE_QS="E" then VARIABLE = PUT("Vega",$48.);
if PARAMETER_QS="VEGA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS="ABS_DELTA"  & TYPE_QS="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);
IF PARAMETER_QS ="ABS_DELTA" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="GAMMA" & TYPE_QS="E" then VARIABLE = PUT("Gamma",$48.);
if PARAMETER_QS="GAMMA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "TICK_CHANGE_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER_QS = "PTICK_CHANGE_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Underlying Midpoint",$48.);
IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "CALL_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER_QS = "CALL_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="E" THEN VARIABLE = PUT("Underlying Quoted Spr.",$48.);
IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "BUY_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Buy",$48.);
IF PARAMETER_QS = "BUY_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="E" THEN VARIABLE = PUT("Arbitrage$_{t-1}$",$48.);
IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="E" THEN  VARIABLE = PUT("Days-to-Expiry",$48.);
IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="E"  then  VARIABLE = PUT("Underlying Volatility$_{t-1}$",$48.);
IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="E" THEN  VARIABLE = PUT("Underlying MCAP",$48.);
IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS = "Rsq" & TYPE_QS="E" THEN  VARIABLE = PUT("Rsq",$48.);

IF PARAMETER_QS = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);

char_2dash= put("\\",$12.);


	
	if VARIABLE not in ("Rsq","NObs") then do;
	
			if    PROBT_QS<0.01 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\threeS"),$48.);
			else if PROBT_QS<0.05 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\twoS"),$48.);
			else if PROBT_QS<0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\oneS"),$48.);
			else if PROBT_QS>0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$CHAR48.);
	end;

	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_ES) then do;
	
			if    PROBT_ES<0.01 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\threeS"),$28.);
			else if PROBT_ES<0.05 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\twoS"),$28.);
			else if PROBT_ES<0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\oneS"),$28.);
			else if PROBT_ES>0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$CHAR48.);
	end;
	
	
	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_PIMP) then do;
	
			if    PROBT_PIMP<0.01 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\threeS"),$48.);
			else if PROBT_PIMP<0.05 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\twoS"),$48.);
			else if PROBT_PIMP<0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\oneS"),$48.);
			else if PROBT_PIMP>0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$CHAR48.);
	end;

	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_EQ) then do;
	
			if    PROBT_EQ<0.01 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\threeS"),$48.);
			else if PROBT_EQ<0.05 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\twoS"),$48.);
			else if PROBT_EQ<0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\oneS"),$48.);
			else if PROBT_EQ>0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$CHAR48.);
	end;

		if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_MMREV) then do;
	
			if    PROBT_MMREV<0.01 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\threeS"),$48.);
			else if PROBT_MMREV<0.05 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\twoS"),$48.);
			else if PROBT_MMREV<0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\oneS"),$48.);
			else if PROBT_MMREV>0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$CHAR48.);
	end;

	
	


		if Type_QS="S" then do;
				VARIABLE = PUT(" ",$48.);
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_QS) THEN
				VALUE_CHAR_QS=PUT(CATS("(",PUT(Value_QS,8.3),")"),$28.);
				IF NOT MISSING(Value_ES) THEN
				VALUE_CHAR_ES=PUT(CATS("(",PUT(Value_ES,8.3),")"),$28.);
				IF NOT MISSING(Value_PIMP) THEN
				VALUE_CHAR_PIMP=PUT(CATS("(",PUT(Value_PIMP,8.3),")"),$28.);
				IF NOT MISSING(Value_EQ) THEN
		    	VALUE_CHAR_EQ=PUT(CATS("(",PUT(Value_EQ,8.3),")"),$28.);
				IF NOT MISSING(Value_MMREV) THEN
		    	VALUE_CHAR_MMREV=PUT(CATS("(",PUT(Value_MMREV,8.3),")"),$28.);

			end;
			
	
	 


if VARIABLE in ("Rsq","NObs") then do;
	

	VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$28.);
	VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$28.);
	VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$28.);
	VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$28.);
	VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$28.);

END;

RUN;


proc sort data=Regression_Combined;by n_QS TYPE_QS;run;


ODS SELECT ALL;
ODS RESULTS;
ODS GRAPHICS OFF;
ODS TITLE "Appendix TABLE A4 - PANEL A";
/*=========================================*/
PROC REPORT data=Regression_Combined;
columns
		variable 
		value_char_QS
		value_char_ES
		value_char_PIMP
		value_char_MMREV;


DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
DEFINE value_char_QS / DISPLAY "Quoted Spread" format =$168.;
DEFINE value_char_ES / DISPLAY "Effective Spread" format =$168.;
DEFINE value_char_PIMP / DISPLAY "Price Improvement" format =$168.;
DEFINE value_char_MMREV / DISPLAY "Market Maker Revenues" format =$168.;

run;


ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
ODS TITLE " ";
/*=========================================*/











PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
		MMREV_P=

		PFOF_IND
		AUCTION_IND
		AUC_PFOF
		DMMP_IND
		Abs_Delta
		INV_STOCK_MIDPOINT
		GAMMA
		VEGA
		BUY_IND
		TICK_CHANGE_IND
		CALL_IND
		LOG_DAYS_EXP
		LOG_S_VOL
		TICK_CHANGE_IND
		LOG_MCAP




/NOINT
;
quit;







data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	MMREV_P=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP




	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

	rename Estimate=value;
	Type="E";
	IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;
	parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_MR;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 
run;


proc sql;
	create table Regression_MR
	as select a.*, b.Probt
	from Regression_MR A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	QS_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP



/NOINT
;


quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;




PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	QS_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_QS;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 

run;


proc sql;
	create table Regression_QS
	as select a.*, b.Probt
	from Regression_QS A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;






PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
ES_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;

quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;

		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	ES_OPTIONSCALED =

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
	    IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_ES;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_ES
	as select a.*, b.Probt
	from Regression_ES A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;













PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	PIMP_P=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;






data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";

		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	PIMP_P=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

	CHARDATE 

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;
	parameter = "Rsq";
	type ="E";
    IF find(parameter, "SYMBOL_ONLY")=0;
	IF find(parameter, "CHARDATE")=0;
	
run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

		type = "E";
		if label1 = "Number of Observations Read";
		parameter = "NObs";
		rename NObsRead = value;

run;



data Regression_PIMP_C;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 				then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_PIMP_C
	as select a.*, b.Probt
	from Regression_PIMP_C A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SORT DATA=REG_DATA_MATCHED; BY SYMBOL_ONLY;RUN;
ODS OUTPUT ParameterEstimates = ParameterEstimates_clus (keep=parameter StdErr probt);
proc surveyreg data=REG_DATA_MATCHED_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP

/NOINT
;


quit;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";

		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;

		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=REG_DATA_MATCHED;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

	EQ=

	PFOF_IND
	AUCTION_IND
	AUC_PFOF
	DMMP_IND
	Abs_Delta
	INV_STOCK_MIDPOINT
	Underlying_Vol
	STOCK_QS
	GAMMA
	VEGA
	BUY_IND
	TICK_CHANGE_IND
	INV_OPTION_MIDPOINT
	CALL_IND
	LOG_DAYS_EXP
	LOG_S_VOL
	TICK_CHANGE_IND
	LOG_MCAP
	CHARDATE 

/solution noint; 
run;
quit;



data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;
		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;

run;





data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;

		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;





data nobs(keep=value type parameter);
	set nobs;

	type = "E";
	if label1 = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;



data Regression_EQ;
	set ParameterEstimates_e parameterEstimates_s_clus FITSTATISTICS nobs;

		
		if parameter="AUCTION_IND" 			then n=1;
		if parameter="PFOF_IND" 			then n=2;
		if parameter="AUC_PFOF" 			then n=3;	
		if parameter="DMMP_IND" 			then n=4;
		if parameter ="ABS_DELTA" 			then n=5;	
		if parameter="GAMMA" 				then n=6;
		if parameter ="VEGA" 				then n=7;
		if parameter="UNDERLYING_VOL" 		then n=8;
		if parameter = "STOCK_QS" 			then n=9;
		if parameter ="INV_OPTION_MIDPOINT" then n=10;
		if parameter ="INV_STOCK_MIDPOINT" 	then n=11;
		if parameter ="LOG_MCAP" 			then n=12;
		if parameter="LOG_S_VOL" 			then n=13;
		if parameter ="CALL_IND" 			then n=14;
		if parameter = "LOG_DAYS_EXP" 		then n=15;
		if parameter = "TICK_CHANGE_IND" 	then n=16;
		if parameter ="BUY_IND" 			then n=17;
		if parameter="Rsq" 					then n=18;
		if parameter ="NObs" 				then n=19; 


run;


proc sql;
	create table Regression_EQ
	as select a.*, b.Probt
	from Regression_EQ A left join ParameterEstimates_p_clus B
	ON A.parameter = b.parameter;
quit;







PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 


	A.VALUE AS VALUE_QS,	A.PROBT AS PROBT_QS, 	A.N as N_QS, 	A.PARAMETER AS PARAMETER_QS,	A.TYPE AS TYPE_QS,
	B.VALUE AS VALUE_ES,	B.PROBT AS PROBT_ES, 	B.N as N_ES, 	B.PARAMETER AS PARAMETER_ES,	B.TYPE AS TYPE_ES,
	C.VALUE AS VALUE_PIMP,	C.PROBT AS PROBT_PIMP, 	C.N as N_PIMP, 	C.PARAMETER AS PARAMETER_PIMP,	C.TYPE AS TYPE_PIMP,
	D.VALUE AS VALUE_EQ,	D.PROBT AS PROBT_EQ, 	D.N as N_EQ, 	D.PARAMETER AS PARAMETER_EQ,	D.TYPE AS TYPE_EQ,
	E.VALUE AS VALUE_MMREV,	E.PROBT AS PROBT_MMREV, E.N as N_MMREV, E.PARAMETER AS PARAMETER_MMREV,	E.TYPE AS TYPE_MMREV
	
	
	FROM REGRESSION_QS A 
	LEFT JOIN REGRESSION_ES B
	ON A.PARAMETER=B.PARAMETER & A.TYPE=B.TYPE
	LEFT JOIN REGRESSION_PIMP_C C
	ON A.PARAMETER=C.PARAMETER & A.TYPE=C.TYPE
	LEFT JOIN REGRESSION_EQ D
	ON A.PARAMETER=D.PARAMETER & A.TYPE=D.TYPE
	LEFT JOIN REGRESSION_MR E
	ON A.PARAMETER=E.PARAMETER & A.TYPE=E.TYPE
;
QUIT;



data Regression_Combined;
SET Regression_Combined;

if PARAMETER_QS="AUCTION_IND" THEN N_QS=0.9;
if PARAMETER_QS="AUC_PFOF" THEN N_QS=2.9;

IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="E" then VARIABLE = PUT("PFOF",$48.);
IF PARAMETER_QS = "PFOF_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUCTION_IND" & TYPE_QS="E" then VARIABLE = PUT("Auction",$48.);
if PARAMETER_QS="AUCTION_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="E" then VARIABLE = PUT("Option Quoted Spread",$48.);
if PARAMETER_QS="QUOTEDSPREAD_C" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



if PARAMETER_QS="AUC_PFOF" & TYPE_QS="E" then VARIABLE = PUT("Auction $\times$ PFOF",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="DMMP_IND" & TYPE_QS="E" then VARIABLE = PUT("DMMP",$48.);
if PARAMETER_QS="DMMP_IND" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="VEGA" & TYPE_QS="E" then VARIABLE = PUT("Vega",$48.);
if PARAMETER_QS="VEGA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS="ABS_DELTA"  & TYPE_QS="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);
IF PARAMETER_QS ="ABS_DELTA" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="GAMMA" & TYPE_QS="E" then VARIABLE = PUT("Gamma",$48.);
if PARAMETER_QS="GAMMA" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);
if PARAMETER_QS="AUC_PFOF" & TYPE_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "TICK_CHANGE_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER_QS = "PTICK_CHANGE_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER_QS = "INV_OPTION_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="E" THEN VARIABLE = PUT("1/Underlying Midpoint",$48.);
IF PARAMETER_QS = "INV_STOCK_MIDPOINT" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "CALL_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER_QS = "CALL_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="E" THEN VARIABLE = PUT("Underlying Quoted Spr.",$48.);
IF PARAMETER_QS = "STOCK_QS" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_S_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "BUY_IND" & TYPE_QS="E" THEN  VARIABLE = PUT("Buy",$48.);
IF PARAMETER_QS = "BUY_IND" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="E" THEN VARIABLE = PUT("Arbitrage$_{t-1}$",$48.);
IF PARAMETER_QS = "ARBITRAGE" & TYPE_QS="S" THEN VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="E" THEN  VARIABLE = PUT("Days-to-Expiry",$48.);
IF PARAMETER_QS = "LOG_DAYS_EXP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="E"  then  VARIABLE = PUT("Underlying Volatility$_{t-1}$",$48.);
IF PARAMETER_QS ="UNDERLYING_VOL"  & Type_QS="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="E" THEN  VARIABLE = PUT("Underlying MCAP",$48.);
IF PARAMETER_QS = "LOG_MCAP" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER_QS = "LOG_SIZE_VOL" & TYPE_QS="S" THEN  VARIABLE = PUT(" ",$48.);



IF PARAMETER_QS = "Rsq" & TYPE_QS="E" THEN  VARIABLE = PUT("Rsq",$48.);

IF PARAMETER_QS = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);

char_2dash= put("\\",$12.);


	
	if VARIABLE not in ("Rsq","NObs") then do;
	
			if    PROBT_QS<0.01 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\threeS"),$48.);
			else if PROBT_QS<0.05 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\twoS"),$48.);
			else if PROBT_QS<0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(CATS(PUT(Value_QS,8.3),"\oneS"),$48.);
			else if PROBT_QS>0.1 & Type_QS="E" then VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$CHAR48.);
	end;

	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_ES) then do;
	
			if    PROBT_ES<0.01 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\threeS"),$28.);
			else if PROBT_ES<0.05 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\twoS"),$28.);
			else if PROBT_ES<0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(CATS(PUT(Value_ES,8.3),"\oneS"),$28.);
			else if PROBT_ES>0.1 & Type_ES="E" then VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$CHAR48.);
	end;
	
	
	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_PIMP) then do;
	
			if    PROBT_PIMP<0.01 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\threeS"),$48.);
			else if PROBT_PIMP<0.05 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\twoS"),$48.);
			else if PROBT_PIMP<0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(CATS(PUT(Value_PIMP,8.3),"\oneS"),$48.);
			else if PROBT_PIMP>0.1 & Type_PIMP="E" then VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$CHAR48.);
	end;

	
	if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_EQ) then do;
	
			if    PROBT_EQ<0.01 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\threeS"),$48.);
			else if PROBT_EQ<0.05 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\twoS"),$48.);
			else if PROBT_EQ<0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(CATS(PUT(Value_EQ,8.3),"\oneS"),$48.);
			else if PROBT_EQ>0.1 & Type_EQ="E" then VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$CHAR48.);
	end;

		if VARIABLE not in ("Rsq","NObs") & NOT MISSING(Value_MMREV) then do;
	
			if    PROBT_MMREV<0.01 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\threeS"),$48.);
			else if PROBT_MMREV<0.05 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\twoS"),$48.);
			else if PROBT_MMREV<0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(CATS(PUT(Value_MMREV,8.3),"\oneS"),$48.);
			else if PROBT_MMREV>0.1 & Type_MMREV="E" then VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$CHAR48.);
	end;

	
	


		if Type_QS="S" then do;
				VARIABLE = PUT(" ",$48.);
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_QS) THEN
				VALUE_CHAR_QS=PUT(CATS("(",PUT(Value_QS,8.3),")"),$28.);
				IF NOT MISSING(Value_ES) THEN
				VALUE_CHAR_ES=PUT(CATS("(",PUT(Value_ES,8.3),")"),$28.);
				IF NOT MISSING(Value_PIMP) THEN
				VALUE_CHAR_PIMP=PUT(CATS("(",PUT(Value_PIMP,8.3),")"),$28.);
				IF NOT MISSING(Value_EQ) THEN
		    	VALUE_CHAR_EQ=PUT(CATS("(",PUT(Value_EQ,8.3),")"),$28.);
				IF NOT MISSING(Value_MMREV) THEN
		    	VALUE_CHAR_MMREV=PUT(CATS("(",PUT(Value_MMREV,8.3),")"),$28.);

			end;
			
	
	 


if VARIABLE in ("Rsq","NObs") then do;
	

	VALUE_CHAR_QS=PUT(PUT(Value_QS,8.3),$28.);
	VALUE_CHAR_ES=PUT(PUT(Value_ES,8.3),$28.);
	VALUE_CHAR_PIMP=PUT(PUT(Value_PIMP,8.3),$28.);
	VALUE_CHAR_EQ=PUT(PUT(Value_EQ,8.3),$28.);
	VALUE_CHAR_MMREV=PUT(PUT(Value_MMREV,8.3),$28.);

END;

RUN;


proc sort data=Regression_Combined;by n_QS TYPE_QS;run;


ODS SELECT ALL;
ODS RESULTS;
ODS GRAPHICS OFF;
/*=========================================*/
ODS TITLE "Appendix TABLE A4 - PANEL B";
PROC REPORT data=Regression_Combined;
columns
		variable 
		value_char_QS
		value_char_ES
		value_char_PIMP
		value_char_EQ
		value_char_MMREV;


DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
DEFINE value_char_QS / DISPLAY "Quoted Spread" format =$168.;
DEFINE value_char_ES / DISPLAY "Effective Spread" format =$168.;
DEFINE value_char_PIMP / DISPLAY "Price Improvement" format =$168.;
DEFINE value_char_EQ / DISPLAY "EQ" format =$168.;
DEFINE value_char_MMREV / DISPLAY "Market Maker Revenues" format =$168.;

run;


/*=========================================*/
ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
ODS TITLE " ";




proc printto log=log;
run;
